R Markdown

This is an R Markdown document. In this document, I presented the relationship between vaccinated rate between income level, incentive programs implemented by local government and incentive program mentions from local health deparment’s twitter accounts. This document showcases an perfect example to under social issues/problems by leveraging multiple data sources (e.g. administrative data, U.S. census and Twitter)

##load packages

library(tidycensus)
## Warning: package 'tidycensus' was built under R version 3.6.2
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.2
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.1.2     ✓ dplyr   1.0.6
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 3.6.2
## Warning: package 'tibble' was built under R version 3.6.2
## Warning: package 'tidyr' was built under R version 3.6.2
## Warning: package 'readr' was built under R version 3.6.2
## Warning: package 'purrr' was built under R version 3.6.2
## Warning: package 'dplyr' was built under R version 3.6.2
## Warning: package 'forcats' was built under R version 3.6.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(sf)
## Warning: package 'sf' was built under R version 3.6.2
## Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0
library(mapview)
## Warning: package 'mapview' was built under R version 3.6.2
## Warning: no function found corresponding to methods exports from 'raster' for:
## 'wkt'
library(leafem)
## Warning: package 'leafem' was built under R version 3.6.2
options(tigris_use_cache = TRUE)

Apply for Census’s API

# here is the link at apply for census API
## http://api.census.gov/data/key_signup.html.

census_api_key("587741d263a5873b848cd00efcf48d53f17156df")
## To install your API key for use in future sessions, run this function with `install = TRUE`.

load ACS 2019

extract total population estimates at county level

md_2019_pop<-get_acs(geography = "county",
                     variables = "B01003_001",
                     state = "MD",
                     year = 2019,
                     geometry = TRUE)
## Getting data from the 2015-2019 5-year ACS

extract median income estimates at county level

md_2019_inc<-get_acs(geography = "county",
                     variables = "B21004_001",
                     state = "MD",
                     year = 2019,
                     geometry = TRUE)
## Getting data from the 2015-2019 5-year ACS

draw a map

draw a map with population estimates and median income

mymap<-mapview(md_2019_pop,
               zcol = "estimate", 
               homebutton = FALSE) +
        mapview(md_2019_inc,
                zcol = "estimate",
                homebutton = FALSE)
mymap

load policy data (including county-level vaccination rate, incentive equity programs)

md_vax <- read_csv("md_vax.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   GEOID = col_double(),
##   county = col_character(),
##   equity_transportation = col_double(),
##   equity_time = col_double(),
##   equity_special_needs = col_double(),
##   equity_walk_ins = col_double(),
##   incentives_business = col_double(),
##   incentives_individual = col_double(),
##   election = col_character(),
##   at_least_one = col_double(),
##   fully_vaccinated = col_double(),
##   age_12_at_least_one = col_double(),
##   age_12_fully = col_double(),
##   age_18_at_least_one = col_double(),
##   age_18_fully = col_double(),
##   hesitancy_level = col_double()
## )

creating a variable called equity scores

md_vax<-md_vax %>%
        mutate(equity_scores = equity_special_needs + equity_time + equity_transportation + equity_walk_ins+incentives_business + incentives_individual)

extract dataframes of vax rate & equity scores

md_vaxrate<-md_vax %>%
        select(GEOID, at_least_one)



md_equity<-md_vax %>%
        select(GEOID, equity_scores)

extract geo coordinates from the sf file extracted from tidycensus

md_geo<-md_2019_pop %>%
        select(GEOID, geometry)

add vaxrate, equity program data to geo dataframe

md_geo_vaxrate <- merge(md_geo, md_vaxrate,by = "GEOID")

md_geo_equity<-merge(md_geo, md_equity, by = "GEOID")

Prepare twitter text data and load it on the map

load("/Users/haoshu/Desktop/SICSS/group project/countyvaxtweets_geoid.rda")
head(countyvaxtweets_geoid)

extract text of tweets from the twitter dataframe

md_tweet<-countyvaxtweets_geoid %>%
        select(GEOID, county, text)

create a dictionary of incentive program mentions

incentive<-"grocery | groceries | free | giveaway | prize | food | drink | concert | festival | ice | lottery | cash | reward | beer | Juneteenth | summer | vaxcash | campaign | incentive | stadium | ticket | mobile" 

## create a new flag variable indicating whether the tweets have incentives mentions
for (i in 1:nrow(md_tweet)) {
        md_tweet$isinctv[i]<-grepl(incentive, md_tweet$text[i])
}  

covert the logical variable (isinctv) to a numeric variable

md_tweet$isinctv<-as.numeric(md_tweet$isinctv)
class(md_tweet$isinctv)
## [1] "numeric"

calculate the total number of incentive mentions by each county

num_inctv<-md_tweet %>%
        group_by(GEOID) %>%
        summarise ( 
                count_inctv= sum (isinctv, na.rm = TRUE)
        )
head(num_inctv)

merge the geo coordinates to the geo tweets dataframe

md_geo_twt<-merge(md_geo, num_inctv, by = "GEOID", all.x = TRUE, all.y = TRUE)

# replace NA with "0"s.  
md_geo_twt$count_inctv[is.na(md_geo_twt$count_inctv)]<-0

plot vax rate as dots on the map

library(RColorBrewer)
par(mar=c(3,4,2,2))
display.brewer.all()

plot a map indicating vaccinated rates by each county

dot_vax<-st_centroid(md_geo_vaxrate) ## this will convert the vax rate into dots
## Warning in st_centroid.sf(md_geo_vaxrate): st_centroid assumes attributes are
## constant over geometries of x
## Warning in st_centroid.sfc(st_geometry(x), of_largest_polygon =
## of_largest_polygon): st_centroid does not give correct centroids for longitude/
## latitude data
map_dotvax<-mapview(dot_vax, 
                    zcol = "at_least_one", 
                    cex="at_least_one", 
                    col.regions =brewer.pal(9, "YlGnBu"),
                    homebutton = FALSE,
                    layer.name = "vaccination rate (at least one dose)")
## Warning: Found less unique colors (9) than unique zcol values (18)! 
## Interpolating color vector to match number of zcol values.
map_dotvax

plot a map with number of equity programs

map_equity<-mapview(md_geo_equity,
        zcol = "equity_scores",
        homebutton = FALSE,
        col.regions =brewer.pal(9, "Oranges"),
        layer.name = "number of equity/incentive programs") 
## Warning: Found more unique colors (9) than unique zcol values (5)! 
## Trimming color vector to match number of zcol values.
map_equity

plot a map with equity layer and a map with vaccinated rate layer

map_equity + map_dotvax

plot a map with median income and a map with vaccinated rate

map_inc<-mapview(md_2019_inc,
        zcol = "estimate",
        col.regions =brewer.pal(9, "YlGn"),
                    homebutton = FALSE,
                    layer.name = "median income level")
## Warning: Found less unique colors (9) than unique zcol values (24)! 
## Interpolating color vector to match number of zcol values.
## layer up vaccination rate with median income
map_inc + map_dotvax

plot a map with twitter mentions and a map with vaccinated rate

map_twt<-mapview(md_geo_twt,
                 zcol = "count_inctv", 
                 homebutton = FALSE, 
                 col.regions = brewer.pal(9, "PuRd"),
                 layer.name = "incentive mentions on twitter")

map_twt + map_dotvax

plot a map with 2020 election results and a map with vaccinated rate

# create a election dataframe with geo coordinates
md_elect<-md_vax %>%
        select(GEOID, county, election) 
md_elect$election[md_elect$election=="Biden"]<-1
md_elect$election[md_elect$election=="Trump"]<-0
md_elect$election<-factor(md_elect$election, levels = c(0,1), labels = c("Trump", "Biden"))

# merge with geo coodinates
md_geo_elect<-merge(md_geo, md_elect, by = "GEOID")


# plot a map of election results in MD by county
map_elect<-mapview(md_geo_elect,
                   zcol = "election",
                   homebutton = FALSE,
                   col.regions = brewer.pal(9, "Spectral"),
                   layer.name = "election results in 2020, red-Trump, blue-Biden")

map_elect 
# layer up with vaccined rates 
map_elect + map_dotvax